c("GO:0005794","Golgi apparatus", 0.969, 3.801, 1.102, 4.979,-14.7382,0.473,0.426),
c("GO:0035770","ribonucleoprotein granule", 0.131, 0.449, 4.702, 4.109,-8.8682,0.514,0.430),
c("GO:1990234","transferase complex", 1.223,-4.697, 4.920, 5.080,-37.0791,0.735,0.439),
c("GO:0098687","chromosomal region", 0.306, 4.664, 4.870, 4.479,-7.4754,0.513,0.467),
c("GO:0044429","mitochondrial part", 1.203, 4.417, 3.065, 5.073,-14.5545,0.464,0.481),
c("GO:0098796","membrane protein complex", 2.473,-4.957, 3.573, 5.386,-7.0498,0.762,0.484),
c("GO:0015630","microtubule cytoskeleton", 0.900, 3.840, 5.946, 4.947,-13.3919,0.507,0.523),
c("GO:1990904","ribonucleoprotein complex", 5.291,-4.877, 4.062, 5.717,-17.3005,0.757,0.543),
c("GO:0032838","cell projection cytoplasm", 0.014, 5.995,-2.409, 3.136,-7.9594,0.678,0.601),
c("GO:0000151","ubiquitin ligase complex", 0.232,-3.386, 5.040, 4.358,-17.1811,0.616,0.604),
c("GO:0005681","spliceosomal complex", 0.250, 1.287, 3.948, 4.392,-13.3503,0.415,0.628),
c("GO:0005768","endosome", 0.319, 4.073, 0.568, 4.497,-9.2019,0.502,0.669),
c("GO:0031248","protein acetyltransferase complex", 0.152,-3.634, 5.450, 4.175,-7.6483,0.625,0.693),
c("GO:0031461","cullin-RING ubiquitin ligase complex", 0.159,-3.711, 4.837, 4.195,-9.0096,0.624,0.695),
c("GO:0044441","ciliary part", 0.139, 6.193, 2.393, 4.137,-7.8004,0.532,0.711),
c("GO:0044431","Golgi apparatus part", 0.608, 4.218, 1.796, 4.777,-12.2954,0.421,0.713),
c("GO:0016607","nuclear speck", 0.091, 5.789, 4.217, 3.953,-18.0175,0.515,0.722),
c("GO:0042175","nuclear outer membrane-endoplasmic reticulum membrane network", 0.771, 3.000,-4.031, 4.880,-9.4714,0.680,0.731),
c("GO:0005813","centrosome", 0.185, 4.677, 5.418, 4.261,-10.0874,0.483,0.747),
c("GO:0005730","nucleolus", 0.664, 4.033, 4.209, 4.815,-7.3250,0.436,0.759),
c("GO:0016604","nuclear body", 0.189, 5.408, 4.071, 4.269,-29.9712,0.494,0.770),
c("GO:0044440","endosomal part", 0.124, 4.634, 1.152, 4.087,-8.8982,0.473,0.785),
c("GO:0005815","microtubule organizing center", 0.350, 4.227, 5.138, 4.537,-13.0272,0.463,0.792),
c("GO:0005740","mitochondrial envelope", 0.901, 4.737, 3.010, 4.948,-10.1093,0.474,0.822),
c("GO:0000139","Golgi membrane", 0.403, 4.414, 1.656, 4.598,-12.3780,0.432,0.865));
one.data <- data.frame(revigo.data);
names(one.data) <- revigo.names;
one.data <- one.data [(one.data$plot_X != "null" & one.data$plot_Y != "null"), ];
one.data$plot_X <- as.numeric( as.character(one.data$plot_X) );
one.data$plot_Y <- as.numeric( as.character(one.data$plot_Y) );
one.data$plot_size <- as.numeric( as.character(one.data$plot_size) );
one.data$log10_p_value <- as.numeric( as.character(one.data$log10_p_value) );
one.data$frequency <- as.numeric( as.character(one.data$frequency) );
one.data$uniqueness <- as.numeric( as.character(one.data$uniqueness) );
one.data$dispensability <- as.numeric( as.character(one.data$dispensability) );
#head(one.data);
# --------------------------------------------------------------------------
# Names of the axes, sizes of the numbers and letters, names of the columns,
# etc. can be changed below
p1 <- ggplot( data = one.data );
# changed plot size to be smaller!
p1 <- p1 + geom_point( aes( plot_X, plot_Y, colour = log10_p_value, size = (plot_size/2)), alpha = I(0.6) ) + scale_size_area();
p1 <- p1 + scale_colour_gradientn( colours = c("blue", "green", "yellow", "red"), limits = c( min(one.data$log10_p_value), 0) );
# p1 <- p1 + geom_point( aes(plot_X, plot_Y, size = plot_size), shape = 21, fill = "transparent", colour = I (alpha ("black", 0.6) )) + scale_size_area();
p1 <- p1 + scale_size( range=c(5, 30)) + theme_bw(); # + scale_fill_gradientn(colours = heat_hcl(7), limits = c(-300, 0) );
ex <- one.data [ one.data$dispensability < 0.15, ];
# 4.20.2020: selected terms to include based on interest (first 3) and based on dispensability (last 5)
terms.include <- c('mitochondrion', 'ubiquitin ligase complex', 'spliceosomal complex', 'nucleoplasm part', 'catalytic complex', 'whole membrane', 'neuron part', 'envelope')
ex <- subset(one.data, description %in% terms.include)
p1 <- p1 + geom_text( data = ex, aes(plot_X, plot_Y, label = description), colour = I(alpha("black", 0.85)), size = 3 );
p1 <- p1 + labs (y = "semantic space x", x = "semantic space y");
p1 <- p1 + theme(legend.key = element_blank()) ;
one.x_range = max(one.data$plot_X) - min(one.data$plot_X);
one.y_range = max(one.data$plot_Y) - min(one.data$plot_Y);
p1 <- p1 + xlim(min(one.data$plot_X)-one.x_range/10,max(one.data$plot_X)+one.x_range/10);
p1 <- p1 + ylim(min(one.data$plot_Y)-one.y_range/10,max(one.data$plot_Y)+one.y_range/10);
# added to ex for readability:
terms.include2 <- c('nuclear outer membrane-endoplasmic reticulum membrane network', 'cell projection cytoplasm', 'endosome', 'cilium')
ex2 <- subset(one.data, description %in% terms.include2)
p1 <- p1 + geom_text( data = ex2, aes(plot_X, plot_Y, label = description), colour = I(alpha("black", 0.85)), size = 3 );
# remove the legend size and change color title....
p1 + guides(size = F) + labs(col = '-log 10 P-value') + theme_classic()
library(knitr)
library(tidyverse)
library(broom)
library(readxl)
library(lme4)
library(splines)
library(magrittr)
library(data.table)
library(readxl)
library(htmlTable)
library(yangR)
library(analyzeR)
library(patchwork)
library(ggplot2)
library(ggfortify)
knitr::opts_chunk$set(echo = FALSE, cache = TRUE, warning = FALSE)
sessionInfo()
df = readRDS('/Volumes/JHPCE/dcs01/active/projects/mesa/syang/R_objects/full_metab_with_pheno.rds')
# cn.data = readRDS('/Volumes/JHPCE/dcs01/active/projects/Monochrome/Full_Data/all.data.rds')
cn.data = readRDS('/Volumes/JHPCE/dcs01/active/projects/Monochrome/Full_Data/all.data.filter.rds')
missing = df$IDNO[which(df$IDNO %!in% cn.data$IDNO)]
shiplist = read_excel('/Volumes/JHPCE/dcs01/active/projects/mesa/Arking BL and Ex 5 MESA DNA Shipment List 8718.xlsx')
which(missing %!in% shiplist$IDNO)
missed = subset(shiplist, IDNO %in% missing)
cn.data$Exam[which(cn.data$Exam == 'BL')] <- '1'
all.data = merge(cn.data, df)
print(nrow(df))
print(nrow(all.data))
table(all.data$Exam)
lm(inv.norm.transform(N.Acetyl.L.Alanine) ~ age + bmi + as.factor(sex) + as.factor(site) + race + idno, data = df) %>% summary %>% coef -> acet
lm(inv.norm.transform(Citrulline) ~ age + bmi + as.factor(sex) + as.factor(site) + race + idno, data = df) %>% summary %>% coef -> citr
lm(inv.norm.transform(Kynurenine) ~ age + bmi + as.factor(sex) + as.factor(site) + race + idno, data = df) %>% summary %>% coef -> kyn
my.estimates = as.data.frame(rbind(acet[2,], citr[2,], kyn[2,]))
rownames(my.estimates) = c('N-Acetyl-Alanine', 'Citrulline', 'Kynurenine')
my.estimates = my.estimates[,c(1,2,4)]
menni.estimates = data.frame(Beta = c(0.017, 0.025, 0.018), SE = c(0.0009, 0.001, 0.001), P = c(5.8e-68, 1.06e-115, 8.93e-78))
both = cbind(my.estimates, menni.estimates)
both$Estimate = formatbeta(both$Estimate)
both$`Std. Error` = formatbeta(both$`Std. Error`)
both$`Pr(>|t|)` = formatpval(both$`Pr(>|t|)`)
headers = c('Estimate', 'SE', 'Pval')
htmlTable(both, cgroup = c('MESA', "Menni"), n.cgroup = c(3,3), header = paste0('&nbsp;', rep(headers, 2), '&nbsp;'), tfoot="&dagger; MESA only had N-Acetyl-L-Alanine, whereas Menni doesn't distinguish between isomers")
df = all.data
df$sex = as.factor(df$sex)
df$site = as.factor(df$site)
df$Exam = as.factor(df$Exam)
df$idno = as.factor(df$idno)
lm(deltaCT ~ sex + as.factor(site) + ns(age, df = 2) + Exam, data = df) %>% summary
df$mtDNA = scale(resid(lmer(deltaCT ~ sex + as.factor(site) + ns(age, df = 2) + Exam + (1|idno), data = df)))
lmer(deltaCT ~ sex + as.factor(site) + ns(age, df = 2) + Exam + (1|idno), data = df) %>% summary
# looks kind of normal, a little negative shifted
ggplot(df, aes(deltaCT)) + geom_density() + ggtitle('Unadjusted density plot') + xlab('Unadjusted mtDNA metric')
ggplot(df, aes(mtDNA)) + geom_density() + ggtitle('Adjusted density plot') + xlab('Adjusted mtDNA metric')
# functions for running:
run_lmer <- function(expr, cov, rcov, SCORE, omit.outlier = T, outlier_sd = 3) {
expr <- as.numeric(expr)
# expr <- scale(expr) # uncomment if you would like to scale expression
# expr <- inv.norm.transform(expr) # uncomment if you would like to inverse normal transform expression
# Create dataframe
expr_cov <- cbind(SCORE, expr, cov, rcov)
# Create equation
lmer.form = paste0('expr~SCORE+',paste0(colnames(cov), collapse = '+'), '+', paste0('(1|',colnames(rcov), ')', collapse = '+'))
# Run mixed model
lmer.fit <- lmer(lmer.form, data = expr_cov, na.action = na.exclude)
# Get summary
lmer.fit.summary <- summary(lmer.fit)
# Get corr
cor_expr_score <-
cor(expr, SCORE)
# Capture beta, tval, standard error.
lmer.res_ <-
as.data.frame(t(coef(lmer.fit.summary)['SCORE',]))
# get pval christina's way:
pval <- pchisq(lmer.res_$`t value`^2,1,lower.tail=F)
lmer.res_ <-
cbind(lmer.res_, pval,
#      t(confint(lmer.fit)['SCORE', ]),
cor_expr_score)
return(as.matrix(lmer.res_))
}
run.all.lmers <- function(tx_expr, cov, rcov, gene.ids, SCORE, omit.outlier = T, num.cores = 10)
{
require(pbapply)
pboptions(type="txt")
require(lme4)
start = Sys.time()
lmer.res <-
pblapply(tx_expr,            # Expression vector list for `pbapply::pblapply`
run_lmer,             # This function
cov = cov,           # Covariate matrix (fixed effects), as desribed above
rcov = rcov,        # Covariates that are random varaibles
SCORE = SCORE,       # PRS
omit.outlier = omit.outlier,
cl = num.cores)      # Number of cores to parallelize over
end = Sys.time()
total.time.confint = end-start
total.time.confint
# total.time.noconfint = end-start
lmer.res <- simplify2array(lmer.res, higher=F)
rownames(lmer.res) <-
c(# 'intercept',
'beta',
'SE',
't_value',
'pval',
# 'conf.low',
# 'conf.high',
'corr.rho')
colnames(lmer.res) <- gene.ids
lmer.res <- as.data.frame(t(lmer.res))
return(lmer.res)
}
metablist = readRDS('/Volumes/JHPCE/dcs01/active/projects/mesa/syang/R_objects/mesa.metab.list.rds')
v1 = subset(df, Exam == 1)
metablist = readRDS('/Volumes/JHPCE/dcs01/active/projects/mesa/syang/R_objects/mesa.metab.list.rds')
v1 = subset(df, Exam == 1)
tx_expr <- v1[,which(colnames(v1) %in% make.names(metablist))]
cov <- v1[,which(colnames(v1) %in% c('sex', 'age', 'PC1', 'PC2', 'PC3', 'race1c', 'site'))]
SCORE <- v1[,which(colnames(v1) == "mtDNA")]
lm.res = run.all.lms(tx_expr, cov, make.names(metablist), SCORE)
care = dplyr::select(lm.res, beta, SE, pval)
care = care[order(care$pval),]
care %>% mutate(beta = formatbeta(beta), SE = formatbeta(SE), pval = formatpval(pval)) -> show
rownames(show) = rownames(care)
htmlTable(head(show, 10), tfoot="&dagger; Bonferroni cutoff = 5.9e-4", header = paste0('&emsp;&emsp;', colnames(show), '&emsp;&emsp;'))
pval_qqplot(care$pval, xlim = 3, ylim = 3, title = '')
cov <- v1[,which(colnames(v1) %in% c('sex', 'age', 'PC1', 'PC2', 'PC3', 'race', 'site', 'bmi'))]
lm.res = run.all.lms(tx_expr, cov, make.names(metablist), SCORE)
care = dplyr::select(lm.res, beta, SE, pval)
care = care[order(care$pval),]
care %>% mutate(beta = formatbeta(beta), SE = formatbeta(SE), pval = formatpval(pval)) -> show
rownames(show) = rownames(care)
htmlTable(head(show, 10), tfoot="&dagger; Bonferroni cutoff = 5.9e-4", header = paste0('&emsp;&emsp;', colnames(show), '&emsp;&emsp;'))
pval_qqplot(care$pval, xlim = 3, ylim = 3, title = '')
Oxalic.Acid.adj = scale(resid(lm(Oxalic.acid ~ sex + age + PC1 + PC2 + PC3 + race + site, data = all.data)))
Glyceric.Acid.adj = scale(resid(lm(Glyceric.acid ~ sex + age + PC1 + PC2 + PC3 + race + site, data = all.data)))
avp = data.frame(mtDNA = v1$mtDNA, Oxalic.Acid.adj = Oxalic.Acid.adj, Glyceric.Acid.adj = Glyceric.Acid.adj)
Oxalic.Acid.adj = scale(resid(lm(Oxalic.acid ~ sex + age + PC1 + PC2 + PC3 + race + site, data = v1)))
Glyceric.Acid.adj = scale(resid(lm(Glyceric.acid ~ sex + age + PC1 + PC2 + PC3 + race + site, data = v1)))
avp = data.frame(mtDNA = v1$mtDNA, Oxalic.Acid.adj = Oxalic.Acid.adj, Glyceric.Acid.adj = Glyceric.Acid.adj)
oxal = ggplot(avp, aes(scale(mtDNA), Oxalic.Acid.adj)) + geom_point()
oxal = ggplot(avp, aes(scale(mtDNA), Oxalic.Acid.adj)) + geom_point()
glyc = ggplot(avp, aes(scale(mtDNA), Glyceric.Acid.adj)) + geom_point()
oxal + geom_smooth(method = 'lm')
glyc + geom_smooth(method = 'lm')
oxal + geom_smooth(method = 'lm')
v5 = subset(df, Exam == 5)
v5 = subset(df, Exam == 5)
tx_expr <- v5[,which(colnames(v5) %in% make.names(metablist))]
cov <- v5[,which(colnames(v5) %in% c('sex', 'age', 'PC1', 'PC2', 'PC3', 'race1c.y', 'site'))]
SCORE <- v5[,which(colnames(v5) == "mtDNA")]
lm.res = run.all.lms(tx_expr, cov, make.names(metablist), SCORE)
care = dplyr::select(lm.res, beta, SE, pval)
care = care[order(care$pval),]
care %>% mutate(beta = formatbeta(beta), SE = formatbeta(SE), pval = formatpval(pval)) -> show
rownames(show) = rownames(care)
htmlTable(head(show, 10), tfoot="&dagger; Bonferroni cutoff = 5.9e-4", header = paste0('&emsp;&emsp;', colnames(show), '&emsp;&emsp;'))
pval_qqplot(care$pval, xlim = 5, ylim = 5, title = '')
v5 = subset(df, Exam == 5)
tx_expr <- v5[,which(colnames(v5) %in% make.names(metablist))]
cov <- v5[,which(colnames(v5) %in% c('sex', 'age', 'PC1', 'PC2', 'PC3', 'race1c.y', 'site'))]
SCORE <- v5[,which(colnames(v5) == "mtDNA")]
lm.res = run.all.lms(tx_expr, cov, make.names(metablist), SCORE)
care = dplyr::select(lm.res, beta, SE, pval)
care = care[order(care$pval),]
care %>% mutate(beta = formatbeta(beta), SE = formatbeta(SE), pval = formatpval(pval)) -> show
rownames(show) = rownames(care)
htmlTable(head(show, 10), tfoot="&dagger; Bonferroni cutoff = 5.9e-4", header = paste0('&emsp;&emsp;', colnames(show), '&emsp;&emsp;'))
pval_qqplot(care$pval, xlim = 5, ylim = 5, title = '')
cov <- v5[,which(colnames(v5) %in% c('sex', 'age', 'PC1', 'PC2', 'PC3', 'race1c.y', 'site', 'bmi'))]
lm.res = run.all.lms(tx_expr, cov, make.names(metablist), SCORE)
care = dplyr::select(lm.res, beta, SE, pval)
care = care[order(care$pval),]
care %>% mutate(beta = formatbeta(beta), SE = formatbeta(SE), pval = formatpval(pval)) -> show
rownames(show) = rownames(care)
htmlTable(head(show, 10), tfoot="&dagger; Bonferroni cutoff = 5.9e-4", header = paste0('&emsp;&emsp;', colnames(show), '&emsp;&emsp;'))
pval_qqplot(care$pval, xlim = 7, ylim = 7, title = '')
Nasp = scale(resid(lm(N.Acetyl.L.Aspartic.acid ~ sex + age + PC1 + PC2 + PC3 + race1c.y + site, data = v5)))
asp = scale(resid(lm(Aspartic.acid ~ sex + age + PC1 + PC2 + PC3 + race1c.y + site, data = v5)))
avp = data.frame(mtDNA = v5$mtDNA, N.Acetyl.L.Aspartic.acid = Nasp, Aspartic.acid = asp)
v5.nasp = ggplot(avp, aes(mtDNA, N.Acetyl.L.Aspartic.acid)) + geom_point()
v5.asp = ggplot(avp, aes(mtDNA, Aspartic.acid)) + geom_point()
v5.nasp + geom_smooth(method = 'lm')
v5.asp + geom_smooth(method = 'lm')
Nasp = scale(resid(lm(N.Acetyl.L.Aspartic.acid ~ sex + age + PC1 + PC2 + PC3 + race + site, data = v1)))
asp = scale(resid(lm(Aspartic.acid ~ sex + age + PC1 + PC2 + PC3 + race + site, data = v1)))
avp = data.frame(mtDNA = v1$mtDNA, N.Acetyl.L.Aspartic.acid = Nasp, Aspartic.acid = asp)
v1.nasp = ggplot(avp, aes(mtDNA, N.Acetyl.L.Aspartic.acid)) + geom_point()
v1.asp = ggplot(avp, aes(mtDNA, Aspartic.acid)) + geom_point()
oxal5 = scale(resid(lm(Oxalic.acid ~ sex + age + PC1 + PC2 + PC3 + race1c.y + site, data = v5)))
glyc5 = scale(resid(lm(Glyceric.acid ~ sex + age + PC1 + PC2 + PC3 + race1c.y + site, data = v5)))
avp = data.frame(mtDNA = v5$mtDNA, Oxalic.acid = oxal5, Glyceric.acid = glyc5)
oxalv5 = ggplot(avp, aes(mtDNA, oxal5)) + geom_point() + geom_smooth(method='lm')
glycv5 = ggplot(avp, aes(mtDNA, glyc5)) + geom_point() + geom_smooth(method='lm')
oxal + geom_smooth(method = 'lm')
glyc + geom_smooth(method = 'lm')
oxal5
oxalv5
glycv5
oxalv5
glycv5
# Filter so that only people with measurements for both visits are retained.
df %>% count(idno, sort = TRUE) -> count
atleast2 = subset(count, n == 2)
df = subset(df, idno %in% atleast2$idno)
tx_expr <- df[,which(colnames(df) %in% make.names(metablist))]
cov <- df[,which(colnames(df) %in% c('sex', 'age', 'PC1', 'PC2', 'PC3', 'race', 'site'))]
SCORE <- df[,which(colnames(df) == "mtDNA")]
rcov <- dplyr::select(df, idno)
lmer.res = run.all.lmers(tx_expr, cov, rcov, make.names(metablist), SCORE)
care.lmer = dplyr::select(lmer.res, beta, SE, pval)
care.lmer = care.lmer[order(care.lmer$pval),]
care.lmer %>% mutate(beta = formatbeta(beta), SE = formatbeta(SE), pval = formatpval(pval)) -> show.lmer
rownames(show.lmer) = rownames(care.lmer)
htmlTable(head(show.lmer, 10), tfoot="&dagger; Bonferroni cutoff = 5.9e-4", header = paste0('&emsp;&emsp;', colnames(show.lmer), '&emsp;&emsp;'))
pval_qqplot(care.lmer$pval, xlim = 3, ylim = 3, title = '')
adp
v5.nasp
v1.nasp
v5.nasp
v1.nasp
# functions for running:
run_lmer <- function(expr, cov, rcov, SCORE, omit.outlier = T, outlier_sd = 3) {
expr <- as.numeric(expr)
# expr <- scale(expr) # uncomment if you would like to scale expression
expr <- inv.norm.transform(expr) # uncomment if you would like to inverse normal transform expression
# Create dataframe
expr_cov <- cbind(SCORE, expr, cov, rcov)
# Create equation
lmer.form = paste0('expr~SCORE+',paste0(colnames(cov), collapse = '+'), '+', paste0('(1|',colnames(rcov), ')', collapse = '+'))
# Run mixed model
lmer.fit <- lmer(lmer.form, data = expr_cov, na.action = na.exclude)
# Get summary
lmer.fit.summary <- summary(lmer.fit)
# Get corr
cor_expr_score <-
cor(expr, SCORE)
# Capture beta, tval, standard error.
lmer.res_ <-
as.data.frame(t(coef(lmer.fit.summary)['SCORE',]))
# get pval christina's way:
pval <- pchisq(lmer.res_$`t value`^2,1,lower.tail=F)
lmer.res_ <-
cbind(lmer.res_, pval,
#      t(confint(lmer.fit)['SCORE', ]),
cor_expr_score)
return(as.matrix(lmer.res_))
}
run.all.lmers <- function(tx_expr, cov, rcov, gene.ids, SCORE, omit.outlier = T, num.cores = 10)
{
require(pbapply)
pboptions(type="txt")
require(lme4)
start = Sys.time()
lmer.res <-
pblapply(tx_expr,            # Expression vector list for `pbapply::pblapply`
run_lmer,             # This function
cov = cov,           # Covariate matrix (fixed effects), as desribed above
rcov = rcov,        # Covariates that are random varaibles
SCORE = SCORE,       # PRS
omit.outlier = omit.outlier,
cl = num.cores)      # Number of cores to parallelize over
end = Sys.time()
total.time.confint = end-start
total.time.confint
# total.time.noconfint = end-start
lmer.res <- simplify2array(lmer.res, higher=F)
rownames(lmer.res) <-
c(# 'intercept',
'beta',
'SE',
't_value',
'pval',
# 'conf.low',
# 'conf.high',
'corr.rho')
colnames(lmer.res) <- gene.ids
lmer.res <- as.data.frame(t(lmer.res))
return(lmer.res)
}
metablist = readRDS('/Volumes/JHPCE/dcs01/active/projects/mesa/syang/R_objects/mesa.metab.list.rds')
v1 = subset(df, Exam == 1)
tx_expr <- v1[,which(colnames(v1) %in% make.names(metablist))]
cov <- v1[,which(colnames(v1) %in% c('sex', 'age', 'PC1', 'PC2', 'PC3', 'race1c', 'site'))]
SCORE <- v1[,which(colnames(v1) == "mtDNA")]
lm.res = run.all.lms(tx_expr, cov, make.names(metablist), SCORE)
care = dplyr::select(lm.res, beta, SE, pval)
care = care[order(care$pval),]
care %>% mutate(beta = formatbeta(beta), SE = formatbeta(SE), pval = formatpval(pval)) -> show
rownames(show) = rownames(care)
htmlTable(head(show, 10), tfoot="&dagger; Bonferroni cutoff = 5.9e-4", header = paste0('&emsp;&emsp;', colnames(show), '&emsp;&emsp;'))
pval_qqplot(care$pval, xlim = 3, ylim = 3, title = '')
v5 = subset(df, Exam == 5)
tx_expr <- v5[,which(colnames(v5) %in% make.names(metablist))]
cov <- v5[,which(colnames(v5) %in% c('sex', 'age', 'PC1', 'PC2', 'PC3', 'race1c.y', 'site'))]
SCORE <- v5[,which(colnames(v5) == "mtDNA")]
lm.res = run.all.lms(tx_expr, cov, make.names(metablist), SCORE)
care = dplyr::select(lm.res, beta, SE, pval)
care = care[order(care$pval),]
care %>% mutate(beta = formatbeta(beta), SE = formatbeta(SE), pval = formatpval(pval)) -> show
rownames(show) = rownames(care)
htmlTable(head(show, 10), tfoot="&dagger; Bonferroni cutoff = 5.9e-4", header = paste0('&emsp;&emsp;', colnames(show), '&emsp;&emsp;'))
pval_qqplot(care$pval, xlim = 5, ylim = 5, title = '')
Nasp = scale(resid(lm(N.Acetyl.L.Aspartic.acid ~ sex + age + PC1 + PC2 + PC3 + race1c.y + site, data = v5)))
asp = scale(resid(lm(Aspartic.acid ~ sex + age + PC1 + PC2 + PC3 + race1c.y + site, data = v5)))
Nasp
avp = data.frame(mtDNA = v5$mtDNA, N.Acetyl.L.Aspartic.acid = Nasp, Aspartic.acid = asp)
v5.nasp = ggplot(avp, aes(mtDNA, N.Acetyl.L.Aspartic.acid)) + geom_point()
v5.asp = ggplot(avp, aes(mtDNA, Aspartic.acid)) + geom_point()
v5.nasp
# Filter so that only people with measurements for both visits are retained.
df %>% count(idno, sort = TRUE) -> count
atleast2 = subset(count, n == 2)
df = subset(df, idno %in% atleast2$idno)
tx_expr <- df[,which(colnames(df) %in% make.names(metablist))]
cov <- df[,which(colnames(df) %in% c('sex', 'age', 'PC1', 'PC2', 'PC3', 'race', 'site'))]
SCORE <- df[,which(colnames(df) == "mtDNA")]
rcov <- dplyr::select(df, idno)
lmer.res = run.all.lmers(tx_expr, cov, rcov, make.names(metablist), SCORE)
care.lmer = dplyr::select(lmer.res, beta, SE, pval)
care.lmer = care.lmer[order(care.lmer$pval),]
care.lmer %>% mutate(beta = formatbeta(beta), SE = formatbeta(SE), pval = formatpval(pval)) -> show.lmer
rownames(show.lmer) = rownames(care.lmer)
htmlTable(head(show.lmer, 10), tfoot="&dagger; Bonferroni cutoff = 5.9e-4", header = paste0('&emsp;&emsp;', colnames(show.lmer), '&emsp;&emsp;'))
pval_qqplot(care.lmer$pval, xlim = 3, ylim = 3, title = '')
tx_expr <- df[,which(colnames(df) %in% make.names(metablist))]
cov <- df[,which(colnames(df) %in% c('sex', 'age', 'PC1', 'PC2', 'PC3', 'race', 'site', 'bmi'))]
SCORE <- df[,which(colnames(df) == "mtDNA")]
rcov <- dplyr::select(df, idno)
lmer.res = run.all.lmers(tx_expr, cov, rcov, make.names(metablist), SCORE)
care.lmer = dplyr::select(lmer.res, beta, SE, pval)
care.lmer = care.lmer[order(care.lmer$pval),]
care.lmer %>% mutate(beta = formatbeta(beta), SE = formatbeta(SE), pval = formatpval(pval)) -> show.lmer
rownames(show.lmer) = rownames(care.lmer)
htmlTable(head(show.lmer, 10), tfoot="&dagger; Bonferroni cutoff = 5.9e-4", header = paste0('&emsp;&emsp;', colnames(show.lmer), '&emsp;&emsp;'))
pval_qqplot(care.lmer$pval, xlim = 3, ylim = 3, title = '')
ggplot(df, aes(mtDNA, Taurine)) + geom_point()
load('/Volumes/JHPCE/dcs01/active/projects/GTeX/syang/look.version8/Whole Blood/with.gene.rds')
with.gene[grep('^PPARGC1A$', with.gene$symbol),]
# pure replication genes:
polg <- with.gene[grep('^POLG$', with.gene$symbol),]
polg2 <- with.gene[grep('^POLG2$', with.gene$symbol),]
twnk <- with.gene[grep('TWNK', with.gene$symbol),]
ssbp1 <- with.gene[grep('SSBP1', with.gene$symbol),]
primpol <- with.gene[grep('PRIMPOL', with.gene$symbol),]
dna2 <- with.gene[grep('DNA2', with.gene$symbol),]
mgme1 <- with.gene[grep('MGME1', with.gene$symbol),]
rnaseh <- with.gene[grep('^RNASEH1$', with.gene$symbol),]
# mtDNA transcription genes:
tfam <- with.gene[grep('TFAM', with.gene$symbol),]
tefm <- with.gene[grep('TEFM', with.gene$symbol),]
tfb2m <- with.gene[grep('TFB2M', with.gene$symbol),]
polrmt <- with.gene[grep('^POLRMT$', with.gene$symbol),]
# nucleotide metabolism genes:
tk2 <- with.gene[grep('^TK2', with.gene$symbol),]
dguok <- with.gene[grep('^DGUOK$', with.gene$symbol),]
rrm2b <- with.gene[grep('RRM2B', with.gene$symbol),]
tymp <- with.gene[grep('TYMP', with.gene$symbol),]
slc <- with.gene[grep('^SLC25A4$', with.gene$symbol),]
lonp <- with.gene[grep('^LONP1$', with.gene$symbol),]
load('/Volumes/JHPCE/dcs01/active/projects/GTeX/syang/look.version8/R_objects/rna.dna.cor.SCALED.rds')
blood <- subset(rna.dna.cor, Tissue == 'Whole Blood')
blood$symbol <- 'Scaled mtRNA median'
all.mt.reg <- plyr::rbind.fill(blood, polg, polg2, twnk, ssbp1, primpol, dna2, mgme1, rnaseh, tfam, tefm, tfb2m, polrmt, tk2, dguok, rrm2b, tymp, slc)
important <- dplyr::select(all.mt.reg, symbol, beta, SE, pval)
library(knitr)
knitr::kable(important)
important.save <- important
important$beta <- formatC(round(important$beta, 2), 2, format = "f")
important$SE <- formatC(round(important$SE, 2), 2, format = "f")
to.show <- important
to.show$pval2 <- ifelse(to.show$pval > 0.001, format(round(to.show$pval, 3), nsmall = 3), formatC(to.show$pval, format = "e", digits = 2))
to.show$pval2 <- ifelse(to.show$pval > 0.05, format(round(to.show$pval, 2), nsmall = 2), to.show$pval2)
important = dplyr::select(to.show, symbol, beta, SE, pval2)
important2 <- important[order(important$pval, decreasing = F),]
library(stargazer)
library(kableExtra)
kable_styling(kable(important, row.names = F), full_width = F)
kable_styling(kable(important2, row.names = F), full_width = F)
library(htmlTable)
output <-
matrix(paste("Content", LETTERS[1:16]),
ncol=3, byrow = TRUE)
library(htmlTable)
htmlTable(important,
header =  c('Gene', '&emsp;&emsp;&emsp;Effect estimate&emsp;&emsp;&emsp;', 'Standard error', '&emsp;P-value&emsp;'),
rnames = rep('', nrow(important)),
rgroup = c('', "mtDNA replication machinery",
"mtDNA transcription machinery",
"Nucleotide metabolism genes"),
n.rgroup = c(1,8,4,4),
tfoot="&dagger; Genes from Rusecka et. al", compatability = 'htmlTableCompat')
library(qusage)
kegg.sets <- read.gmt('/Volumes/JHPCE/dcs01/active/projects/GTeX/syang/look.version8/GO_enrich/MsigDB/c2.cp.kegg.v7.0.symbols.gmt')
grep('^KEGG_SPLICEOSOME$', names(kegg.sets))
grep('^KEGG_UBIQUITIN_MEDIATED_PROTEOLYSIS$', names(kegg.sets))
spl.genes <- kegg.sets[[77]]
ubq.genes <- kegg.sets[[96]]
spl <- subset(with.gene, symbol %in% spl.genes)
ubq <- subset(with.gene, symbol %in% ubq.genes)
ggplot(spl, aes(-log10(pval))) + geom_rug() + geom_density()
library(ggplot2)
mt <- with.gene[grep('^MT-', with.gene$symbol),]
require(forestplot)
require(meta)
# get means and upper/lower limits
means <- mt$beta
upper1 <- mt$beta+mt$SE
lower1 <- mt$beta-mt$SE
# formatting
means <- c(NA, means, NA)
se <- c(NA, mt$SE, NA)
upper <- c(NA, upper1, NA)
lower <- c(NA, lower1, NA)
# Dan wants p-value...
mt$pval2 <- ifelse(mt$pval > 0.001, format(round(mt$pval, 3), nsmall = 3), formatC(mt$pval, format = "e", digits = 2))
mt$pval2 <- ifelse(mt$pval > 0.05, format(round(mt$pval, 2), nsmall = 2), mt$pval2)
# create table text
text <-cbind(c("MT-encoded genes", mt$symbol, NA), c("Effect estimate", formatC(round(mt$beta, 2), 2, format = "f"), NA), c("Standard error", formatC(round(mt$SE, 2), 2, format = "f"), NA), c("P-value", mt$pval2, NA))
# draw forestplot
pdf(paste0('~/Desktop/mtrna.forestplot.pdf'), width = 10, height = 8, onefile = F)
xticks = seq(from = -0.05, to = 0.3, by = 0.05)
forestplot(text, means, lower, upper, col=fpColors(box="royalblue",line="darkblue", summary="royalblue"), is.summary=c(TRUE, FALSE, FALSE, rep(FALSE, nrow(mt)+1), TRUE),
txt_gp = fpTxtGp(ticks=gpar(cex=0.9), xlab = gpar(fontfamily = "HersheySerif")), new_page = TRUE, xticks = xticks)
#
# txt_gp = fpTxtGp(label = list(gpar(fontface = 3),
#                                      gpar(fontface = 1,
#                                           )),
#                         ticks = gpar(fontfamily = "", cex=1),
#                         xlab  = gpar(fontfamily = "HersheySerif", cex = 1.5)),
#        rbind(HRQoL$Sweden),
#        col=clrs,
xlab="EQ-5D index")
mt.care <- dplyr::select(mt, symbol, beta, pval)
dev.off()
dev.off()
xticks = seq(from = -0.05, to = 0.3, by = 0.05)
forestplot(text, means, lower, upper, col=fpColors(box="royalblue",line="darkblue", summary="royalblue"), is.summary=c(TRUE, FALSE, FALSE, rep(FALSE, nrow(mt)+1), TRUE),
txt_gp = fpTxtGp(ticks=gpar(cex=0.9), xlab = gpar(fontfamily = "HersheySerif")), new_page = TRUE, xticks = xticks)
forestplot(text, means, lower, upper, col=fpColors(box="royalblue",line="darkblue", summary="royalblue"), is.summary=c(TRUE, FALSE, FALSE, rep(FALSE, nrow(mt)+1), TRUE),
txt_gp = fpTxtGp(ticks=gpar(cex=0.9), label = gpar(fontface = 1)), new_page = TRUE, xticks = xticks)
forestplot(text, means, lower, upper, col=fpColors(box="royalblue",line="darkblue", summary="royalblue"), is.summary=c(TRUE, FALSE, FALSE, rep(FALSE, nrow(mt)+1), TRUE),
txt_gp = fpTxtGp(ticks=gpar(cex=0.9), label = gpar(fontface = 1)), new_page = TRUE, xticks = xticks)
forestplot(text, means, lower, upper, col=fpColors(box="royalblue",line="darkblue", summary="royalblue"), is.summary=c(TRUE, FALSE, FALSE, rep(FALSE, nrow(mt)+1), TRUE),
txt_gp = fpTxtGp(ticks=gpar(cex=0.9)), new_page = TRUE, xticks = xticks)
xticks = seq(from = -0.05, to = 0.3, by = 0.05)
forestplot(text, means, lower, upper, col=fpColors(box="royalblue",line="darkblue", summary="royalblue"), is.summary=c(TRUE, FALSE, FALSE, rep(FALSE, nrow(mt)+1), TRUE),
txt_gp = fpTxtGp(ticks=gpar(cex=0.9)), new_page = TRUE, xticks = xticks)
forestplot(text, means, lower, upper, col=fpColors(box="royalblue",line="darkblue", summary="royalblue"), is.summary=c(TRUE, FALSE, FALSE, rep(FALSE, nrow(mt)+1), TRUE),
txt_gp = fpTxtGp(ticks=gpar(cex=0.9)), new_page = TRUE, xticks = xticks)
